load data

# voom adjusted log2 RPKM, TMM normalized and batch effect adjusted 
#load("../rdas/HCR.ribo.log2RPKM.TMM.batchAdjusted.RData")
#load("../rdas/HCR.ribo.voom.weights.RData")
#load("../rdas/HCR.RNA.log2RPKM.TMM.batchAdjusted.RData")
#load("../rdas/HCR.RNA.voom.weights.RData")

# TMM normalized log2 SILAC ratio
#load("../rdas/HCR.protein.TMM.RData")

subset data to include only genes and samples quantified in protein data

# # remove data from reference 19238 line and save as ribo.ref 
# ribo.ref <- ribo.batch.removed[,11]
# ribo.data <- ribo.batch.removed[,-11]
# ribo.weights <- ribo.voom.weights[,-11]
# 
# RNA.ref <- RNA.batch.removed[,11]
# RNA.data <- RNA.batch.removed[,-11]
# RNA.weights <- RNA.voom.weights[,-11]
# 
# # subset by rownames
# 
# # find genes that are qunatified in all three data types 
# RNA.ribo.expressed <- intersect(rownames(RNA.data),  rownames(ribo.data)) 
# 
# all.expressed<- intersect(RNA.ribo.expressed,rownames(HCR.protein.TMM.norm.ESNGlabeled))
# 
# # number of genes quantified in all three data types
# length(all.expressed)
# 
# 
# ribo.expressed.data<-ribo.data[rownames(ribo.data) %in% all.expressed,]
# ribo.expressed.weights<-ribo.weights[rownames(ribo.weights) %in% all.expressed,]
# ribo.expressed.ref <- ribo.ref[names(ribo.ref) %in% all.expressed]
# 
# 
# RNA.expressed.data<-RNA.data[rownames(RNA.data) %in% all.expressed,]
# RNA.expressed.weights<-RNA.weights[rownames(RNA.weights) %in% all.expressed,]
# RNA.expressed.ref <- RNA.ref[names(RNA.ref) %in% all.expressed]
# 
# 
# protein.expressed.data<- HCR.protein.TMM.norm.ESNGlabeled[rownames(HCR.protein.TMM.norm.ESNGlabeled) %in% all.expressed,]
# 
# expressed.gene.names <- as.character(protein.expressed.data[,16])
# names(expressed.gene.names) <- rownames(protein.expressed.data)
# 
# 
# protein.expressed.data <- protein.expressed.data[,-16]

######
#save sup file S4 as an R object

#protein.expressed.data <- as.matrix(protein.expressed.data)

# save(list = c("ribo.expressed.data",
# "ribo.expressed.weights",
# "ribo.expressed.ref",
# "RNA.expressed.data",
# "RNA.expressed.weights",
# "RNA.expressed.ref",
# "protein.expressed.data"), file = "../../tables/fileS4.RData")

######
load("../tables/fileS4.RData")

test TE divergence between speciese

# TE
# limma interaction model

library(limma)
## Warning: package 'limma' was built under R version 3.4.2
library(qvalue)

RNA.ribo.combined <- cbind(RNA.expressed.data, ribo.expressed.data)
RNA.ribo.weights <- cbind(RNA.expressed.weights, ribo.expressed.weights)

species.label <- substring(colnames(RNA.ribo.combined),1,1)
phenotype.label <- c(rep("rna",15),rep("ribo",15))

design <- model.matrix(~species.label*phenotype.label)
colnames(design)<-c("Int","Human","Rhesus","RNA","Human.RNA","Rhesus.RNA")


TE.voom.fit <- lmFit(RNA.ribo.combined,design = design,weights = RNA.ribo.weights)
HR.contrast <- makeContrasts(Human.RNA-Rhesus.RNA,levels = design)
TE.voom.HR.fit <- contrasts.fit(TE.voom.fit,HR.contrast)

TE.voom.fit <- eBayes(TE.voom.fit)
TE.voom.HR.fit <- eBayes(TE.voom.HR.fit)

# since default limma setting parameterized ribo as reference datatype,which results in a beta estimate of 1/TE. Instead of specifying RNA as the reference datatype (i.e. beta will be species difference of ribo/RNA) I decide to simply take the inverse of the coefficient to estimate TE (i.e. negate the coefficinet in log space)

HvC.interaction.TE.effect<- -TE.voom.fit$coefficient[,5]
RvC.interaction.TE.effect<- -TE.voom.fit$coefficient[,6]
HvR.interaction.TE.effect <- -TE.voom.HR.fit$coefficient[,1]

HvC.interaction.TE.pval<-TE.voom.fit$p.value[,5]
RvC.interaction.TE.pval<-TE.voom.fit$p.value[,6]
HvR.interaction.TE.pval<-TE.voom.HR.fit$p.value[,1]



hist(HvC.interaction.TE.pval,breaks = 100)

hist(RvC.interaction.TE.pval,breaks = 100)

hist(HvR.interaction.TE.pval,breaks = 100)

length(which(p.adjust(HvC.interaction.TE.pval,method = "bonferroni") < 0.05))
## [1] 23
length(which(p.adjust(RvC.interaction.TE.pval,method = "bonferroni") < 0.05))
## [1] 35
length(which(p.adjust(HvR.interaction.TE.pval,method = "bonferroni") < 0.05))
## [1] 69
diverged.TE <- cbind(p.adjust(HvC.interaction.TE.pval,method = "bonferroni") < 0.05,p.adjust(RvC.interaction.TE.pval,method = "bonferroni") < 0.05,p.adjust(HvR.interaction.TE.pval,method = "bonferroni") < 0.05)

colnames(diverged.TE) <- c("HvC", "RvC", "HvR")

vennDiagram(diverged.TE)

######
#save TE test restuls to sup file S5


#TE.results <- cbind(HvC.interaction.TE.effect,RvC.interaction.TE.effect,HvR.interaction.TE.effect,HvC.interaction.TE.pval,RvC.interaction.TE.pval,HvR.interaction.TE.pval, qvalue(HvC.interaction.TE.pval)$qvalues, qvalue(RvC.interaction.TE.pval)$qvalues, qvalue(HvR.interaction.TE.pval)$qvalues, p.adjust(HvC.interaction.TE.pval,method = "bonferroni"), p.adjust(RvC.interaction.TE.pval,method = "bonferroni"), p.adjust(HvR.interaction.TE.pval,method = "bonferroni"))


#colnames(TE.results) <- c("HvC.beta","RvC.beta","HvR.beta","HvC.pval","RvC.pval","HvR.pval","HvC.FDR","RvC.FDR","HvR.FDR","HvC.FWER","RvC.FWER","HvR.FWER")

#write.csv(TE.results,"../../tables/FileS5.csv",quote = FALSE,row.names = TRUE)



######

test RNA divergence between speciese

# RNA

species.label <- substring(colnames(RNA.expressed.data),1,1)

design <- model.matrix(~species.label)
colnames(design)<-c("Int","Human","Rhesus")


RNA.voom.fit <- lmFit(RNA.expressed.data,design = design,weights = RNA.expressed.weights)
HR.contrast <- makeContrasts(Human-Rhesus,levels = design)
RNA.voom.HR.fit <- contrasts.fit(RNA.voom.fit,HR.contrast)

RNA.voom.fit <- eBayes(RNA.voom.fit)
RNA.voom.HR.fit <- eBayes(RNA.voom.HR.fit)



HvC.RNA.effect<-RNA.voom.fit$coefficient[,2]
RvC.RNA.effect<-RNA.voom.fit$coefficient[,3]
HvR.RNA.effect <- RNA.voom.HR.fit$coefficient[,1]

HvC.RNA.pval<-RNA.voom.fit$p.value[,2]
RvC.RNA.pval<-RNA.voom.fit$p.value[,3]
HvR.RNA.pval<-RNA.voom.HR.fit$p.value[,1]



hist(HvC.RNA.pval,breaks = 100)

hist(RvC.RNA.pval,breaks = 100)

hist(HvR.RNA.pval,breaks = 100)

length(which(p.adjust(HvC.RNA.pval,method = "bonferroni") < 0.05))
## [1] 170
length(which(p.adjust(RvC.RNA.pval,method = "bonferroni") < 0.05))
## [1] 351
length(which(p.adjust(HvR.RNA.pval,method = "bonferroni") < 0.05))
## [1] 419

test ribo divergence between speciese

# ribo

species.label <- substring(colnames(ribo.expressed.data),1,1)

design <- model.matrix(~species.label)
colnames(design)<-c("Int","Human","Rhesus")


ribo.voom.fit <- lmFit(ribo.expressed.data,design = design,weights = ribo.expressed.weights)
HR.contrast <- makeContrasts(Human-Rhesus,levels = design)
ribo.voom.HR.fit <- contrasts.fit(ribo.voom.fit,HR.contrast)

ribo.voom.fit <- eBayes(ribo.voom.fit)
ribo.voom.HR.fit <- eBayes(ribo.voom.HR.fit)



HvC.ribo.effect<-ribo.voom.fit$coefficient[,2]
RvC.ribo.effect<-ribo.voom.fit$coefficient[,3]
HvR.ribo.effect <- ribo.voom.HR.fit$coefficient[,1]

HvC.ribo.pval<-ribo.voom.fit$p.value[,2]
RvC.ribo.pval<-ribo.voom.fit$p.value[,3]
HvR.ribo.pval<-ribo.voom.HR.fit$p.value[,1]



hist(HvC.ribo.pval,breaks = 100)

hist(RvC.ribo.pval,breaks = 100)

hist(HvR.ribo.pval,breaks = 100)

length(which(p.adjust(HvC.ribo.pval,method = "bonferroni") < 0.05))
## [1] 174
length(which(p.adjust(RvC.ribo.pval,method = "bonferroni") < 0.05))
## [1] 474
length(which(p.adjust(HvR.ribo.pval,method = "bonferroni") < 0.05))
## [1] 465
diverged.ribo <- cbind(p.adjust(HvC.ribo.pval,method = "bonferroni") < 0.05,p.adjust(RvC.ribo.pval,method = "bonferroni") < 0.05,p.adjust(HvR.ribo.pval,method = "bonferroni") < 0.05)

colnames(diverged.ribo) <- c("HvC", "RvC", "HvR")

vennDiagram(diverged.ribo)

estimate TE coefficient for human and chimp

# chimp
chimp.RNA.ribo.data <- cbind(RNA.expressed.data[,1:5],ribo.expressed.data[,1:5])
chimp.RNA.ribo.weights <- cbind(RNA.expressed.weights[,1:5],ribo.expressed.weights[,1:5])
phenotype.label <- c(rep("rna",5),rep("ribo",5))
design <- model.matrix(~phenotype.label)
colnames(design)<-c("Int","RNA")

chimp.TE.voom.fit <- lmFit(chimp.RNA.ribo.data,design = design,weights = chimp.RNA.ribo.weights)

chimp.TE <- -chimp.TE.voom.fit$coefficient[,2]

# human

human.RNA.ribo.data <- cbind(RNA.expressed.data[,6:10],ribo.expressed.data[,6:10])
human.RNA.ribo.weights <- cbind(RNA.expressed.weights[,6:10],ribo.expressed.weights[,6:10])
phenotype.label <- c(rep("rna",5),rep("ribo",5))
design <- model.matrix(~phenotype.label)
colnames(design)<-c("Int","RNA")

human.TE.voom.fit <- lmFit(human.RNA.ribo.data,design = design,weights = human.RNA.ribo.weights)

human.TE <- -human.TE.voom.fit$coefficient[,2]

human.TE.voom.fit <- eBayes(human.TE.voom.fit)

human.TE.pval<-human.TE.voom.fit$p.value[,2]

#rhesus

rhesus.RNA.ribo.data <- cbind(RNA.expressed.data[,11:15],ribo.expressed.data[,11:15])
rhesus.RNA.ribo.weights <- cbind(RNA.expressed.weights[,11:15],ribo.expressed.weights[,11:15])
phenotype.label <- c(rep("rna",5),rep("ribo",5))
design <- model.matrix(~phenotype.label)
colnames(design)<-c("Int","RNA")

rhesus.TE.voom.fit <- lmFit(rhesus.RNA.ribo.data,design = design,weights = rhesus.RNA.ribo.weights)

rhesus.TE <- -rhesus.TE.voom.fit$coefficient[,2]

Scatter plot of TE comparing between species with significant genes highlighted

library(ggplot2)


# human vs chimp TE divergence plot for a main figure

HvC.FWER<-p.adjust(HvC.interaction.TE.pval,method = "bonferroni")

#HvC.qval<- HvC.interaction.TE.qval$qvalues

# color by FWER value < 0.05
point.color <- cut(HvC.FWER,breaks = c (0,0.05,1), labels = c("blue","grey50"))

#point.color <- cut(HvC.qval,breaks = c (0,0.01,1), labels = c("blue","grey50"))


ggplot(mapping = aes(x=HvC.interaction.TE.effect,y=-log10(HvC.interaction.TE.pval)))+geom_point(size=1, alpha=0.5, col=point.color)+ xlab("log2(human/chimpanzee)")+ylab("-log10(P-value)")+ggtitle("Differential TE: human vs. chimpanzee")+scale_x_continuous(limits = c(-10, 10)) +theme_bw()

ggplot(mapping = aes(x=HvC.RNA.effect,y=HvC.ribo.effect))+geom_point(size=1, alpha=0.5, col=point.color)+ xlab("log2(human/chimpanzee) RNA")+ylab("log2(human/chimpanzee) ribo")+ggtitle("Differential TE: human vs. chimpanzee")+scale_x_continuous(limits = c(-10, 10))+scale_y_continuous(limits = c(-10, 10))+theme_bw()+geom_abline(slope = 1, intercept = 0)

r.squared<-format(cor(human.TE,chimp.TE)^2, digits = 2)

F2a <- ggplot(mapping = aes(x=human.TE,y=chimp.TE))+geom_point(size=1, alpha=0.5, col=point.color)+ xlab("log2(TE) human")+ylab("log2(TE) chimpanzee")+ggtitle("Translation efficiency")+scale_x_continuous(limits = c(-8, 8))+scale_y_continuous(limits = c(-8, 8))+theme_bw()+geom_abline(slope = 1, intercept = 0)+annotate(geom="text",x=5,y=-5,label=paste("r^2 =",r.squared))

F2a
## Warning: Removed 1 rows containing missing values (geom_point).

# ######
# #save figure 2a as pdf
# pdf("../../figures/Fig2a.pdf", width = 4, height = 4)
# F2a
# dev.off()
# 
# ######

# plot out example TE diverged genes between human and chimp 
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(reshape2)
## Warning: package 'reshape2' was built under R version 3.4.3
TE.diverged <- cbind(RNA.expressed.data[HvC.FWER<0.05,1:10],ribo.expressed.data[HvC.FWER<0.05,1:10])
TE.dverged.genes <- as.data.frame(t(TE.diverged))

gene.selected<-melt(TE.dverged.genes)
## No id variables; using all as measure variables
gene.selected$species <- rep(c("chimp","human"),c(5,5))
gene.selected$datatype <- rep(c("RNA-seq","ribo-seq"),c(10,10)) 
names(gene.selected)<-c("ENSGID","log2RPKM","species","data type")

gene.selected  %>% ggplot(aes(x=species,y=log2RPKM))+geom_point(aes(color = `data type`), size=2.5) + scale_colour_manual(values = c("royal blue","dark red"))+theme_bw(base_size = 15)+facet_wrap(~ENSGID)+theme(legend.key = element_blank())

# plot only PFN1

F2b <- gene.selected %>% filter(ENSGID == "ENSG00000108518") %>% ggplot(aes(x=species,y=log2RPKM))+geom_point(aes(color = `data type`), size=2.5) + scale_colour_manual(values = c("royal blue","dark red"))+theme_bw(base_size = 15)+ggtitle("PFN1")+ylim(c(3,13))+theme(legend.key = element_blank())
## Warning: package 'bindrcpp' was built under R version 3.4.4
F2b

# ######
# #save figure 2b as pdf
# pdf("../../figures/Fig2b.pdf", width = 6, height = 4)
# F2b
# dev.off()
# 
# ######

## plotting other 2 pairwise comparison for supplemental figs

# human vs rhesus TE divergence plot for a main figure

HvR.FWER<-p.adjust(HvR.interaction.TE.pval,method = "bonferroni")

#HvR.qval<- HvR.interaction.TE.qval$qvalues

# color by FWER value < 0.05
point.color <- cut(HvR.FWER,breaks = c (0,0.05,1), labels = c("blue","grey50"))

#point.color <- cut(HvR.qval,breaks = c (0,0.01,1), labels = c("blue","grey50"))


ggplot(mapping = aes(x=HvR.interaction.TE.effect,y=-log10(HvR.interaction.TE.pval)))+geom_point(size=1, alpha=0.5, col=point.color)+ xlab("log2(human/rhesus)")+ylab("-log10(P-value)")+ggtitle("Differential TE: human vs. rhesus")+scale_x_continuous(limits = c(-10, 10)) +theme_bw()
## Warning: Removed 1 rows containing missing values (geom_point).

ggplot(mapping = aes(x=HvR.RNA.effect,y=HvR.ribo.effect))+geom_point(size=1, alpha=0.5, col=point.color)+ xlab("log2(human/rhesus) RNA")+ylab("log2(human/rhesus) ribo")+ggtitle("Differential TE: human vs. rhesus")+scale_x_continuous(limits = c(-10, 10))+scale_y_continuous(limits = c(-10, 10))+theme_bw()+geom_abline(slope = 1, intercept = 0)
## Warning: Removed 1 rows containing missing values (geom_point).

r.squared<-format(cor(human.TE,rhesus.TE)^2, digits = 2)

F2S1a <- ggplot(mapping = aes(x=human.TE,y=rhesus.TE))+geom_point(size=1, alpha=0.5, col=point.color)+ xlab("log2(TE) human")+ylab("log2(TE) rhesus")+ggtitle("Translation efficiency")+scale_x_continuous(limits = c(-8, 8))+scale_y_continuous(limits = c(-8, 8))+theme_bw()+geom_abline(slope = 1, intercept = 0)+annotate(geom="text",x=5,y=-5,label=paste("r^2 =",r.squared))

F2S1a
## Warning: Removed 2 rows containing missing values (geom_point).

# ######
# #save figure 2a as pdf
# pdf("../../figures/Fig2S1a.pdf", width = 4, height = 4)
# F2S1a
# dev.off()
# 
# ######

# chimp vs rhesus TE divergence plot for a main figure

RvC.FWER<-p.adjust(RvC.interaction.TE.pval,method = "bonferroni")

#RvC.qval<- RvC.interaction.TE.qval$qvalues

# color by FWER value < 0.05
point.color <- cut(RvC.FWER,breaks = c (0,0.05,1), labels = c("blue","grey50"))

#point.color <- cut(RvC.qval,breaks = c (0,0.01,1), labels = c("blue","grey50"))


ggplot(mapping = aes(x=RvC.interaction.TE.effect,y=-log10(RvC.interaction.TE.pval)))+geom_point(size=1, alpha=0.5, col=point.color)+ xlab("log2(chimp/rhesus)")+ylab("-log10(P-value)")+ggtitle("Differential TE: chimp vs. rhesus")+scale_x_continuous(limits = c(-10, 10)) +theme_bw()

ggplot(mapping = aes(x=RvC.RNA.effect,y=RvC.ribo.effect))+geom_point(size=1, alpha=0.5, col=point.color)+ xlab("log2(chimp/rhesus) RNA")+ylab("log2(chimp/rhesus) ribo")+ggtitle("Differential TE: chimp vs. rhesus")+scale_x_continuous(limits = c(-10, 10))+scale_y_continuous(limits = c(-10, 10))+theme_bw()+geom_abline(slope = 1, intercept = 0)
## Warning: Removed 1 rows containing missing values (geom_point).

r.squared<-format(cor(chimp.TE,rhesus.TE)^2, digits = 2)

F2S1b <- ggplot(mapping = aes(x=chimp.TE,y=rhesus.TE))+geom_point(size=1, alpha=0.5, col=point.color)+ xlab("log2(TE) chimpanzee")+ylab("log2(TE) rhesus")+ggtitle("Translation efficiency")+scale_x_continuous(limits = c(-8, 8))+scale_y_continuous(limits = c(-8, 8))+theme_bw()+geom_abline(slope = 1, intercept = 0)+annotate(geom="text",x=5,y=-5,label=paste("r^2 =",r.squared))

F2S1b
## Warning: Removed 2 rows containing missing values (geom_point).

# ######
# #save figure 2a as pdf
# pdf("../../figures/Fig2S1b.pdf", width = 4, height = 4)
# F2S1b
# dev.off()
# 
# ######

p value QQplot and effect size boxplot RNA vs. TE

# RNA vs TE
# HvC

TE.HvC.sorted.logP<--log10(sort(HvC.interaction.TE.pval))

RNA.HvC.sorted.logP<--log10(sort(HvC.RNA.pval))



expected <- c(1:length(HvC.interaction.TE.pval)) 
log.exp <- -(log10(expected / (length(expected)+1)))


plot(c(0,7), c(0,7), col="red", lwd=2, type="l", xlab="Expected (-log10 Pvalue)", ylab="Observed (-log10 Pvalue)", xlim=c(0,4), ylim=c(0,20), las=1, xaxs="i", yaxs="i", bty="l", main="human vs chimpanzee")
points(log.exp, TE.HvC.sorted.logP, pch=18, cex=.6, col="orange") 
points(log.exp, RNA.HvC.sorted.logP, pch=18, cex=.6, col="blue") 
legend("topleft",c("RNA","TE"),col = c("blue","orange"),pch=18,cex=.6, bty="n", horiz=TRUE)

# ######
# #save figure 2c as pdf
# pdf("../../figures/Fig2c.pdf", width = 4, height = 4)
# plot(c(0,7), c(0,7), col="red", lwd=2, type="l", xlab="Expected (-log10 p-value)", ylab="Observed (-log10 p-value)", xlim=c(0,4), ylim=c(0,20), las=1, xaxs="i", yaxs="i", bty="l", main="human vs chimpanzee")
# points(log.exp, TE.HvC.sorted.logP, pch=18, cex=.6, col="orange") 
# points(log.exp, RNA.HvC.sorted.logP, pch=18, cex=.6, col="blue") 
# legend("topleft",c("RNA","TE"),col = c("blue","orange"),pch=18,cex=.75, bty="n", horiz=TRUE)
# dev.off()
# 
# ######

#RvC
# RNA vs TE

TE.RvC.sorted.logP<--log10(sort(RvC.interaction.TE.pval))

RNA.RvC.sorted.logP<--log10(sort(RvC.RNA.pval))



expected <- c(1:length(RvC.interaction.TE.pval)) 
log.exp <- -(log10(expected / (length(expected)+1)))


plot(c(0,7), c(0,7), col="red", lwd=2, type="l", xlab="Expected (-log10 Pvalue)", ylab="Observed (-log10 Pvalue)", xlim=c(0,4), ylim=c(0,20), las=1, xaxs="i", yaxs="i", bty="l", main="rhesus vs chimpanzee")
points(log.exp, TE.RvC.sorted.logP, pch=18, cex=.6, col="orange") 
points(log.exp, RNA.RvC.sorted.logP, pch=18, cex=.6, col="blue") 
legend("topleft",c("RNA","TE"),col = c("blue","orange"),pch=18,cex=.6, bty="n", horiz=TRUE)

# ######
# #save figure 2S2a as pdf
# pdf("../../figures/Fig2S2a.pdf", width = 4, height = 4)
# plot(c(0,7), c(0,7), col="red", lwd=2, type="l", xlab="Expected (-log10 p-value)", ylab="Observed (-log10 p-value)", xlim=c(0,4), ylim=c(0,20), las=1, xaxs="i", yaxs="i", bty="l", main="rhesus vs chimpanzee")
# points(log.exp, TE.RvC.sorted.logP, pch=18, cex=.6, col="orange") 
# points(log.exp, RNA.RvC.sorted.logP, pch=18, cex=.6, col="blue") 
# legend("topleft",c("RNA","TE"),col = c("blue","orange"),pch=18,cex=0.75, bty="n", horiz=TRUE)
# dev.off()
# 
# ######


#HvR
# RNA vs TE

TE.HvR.sorted.logP<--log10(sort(HvR.interaction.TE.pval))

RNA.HvR.sorted.logP<--log10(sort(HvR.RNA.pval))



expected <- c(1:length(HvR.interaction.TE.pval)) 
log.exp <- -(log10(expected / (length(expected)+1)))


plot(c(0,7), c(0,7), col="red", lwd=2, type="l", xlab="Expected (-log10 Pvalue)", ylab="Observed (-log10 Pvalue)", xlim=c(0,4), ylim=c(0,20), las=1, xaxs="i", yaxs="i", bty="l", main="rhesus vs human")
points(log.exp, TE.HvR.sorted.logP, pch=18, cex=.6, col="orange") 
points(log.exp, RNA.HvR.sorted.logP, pch=18, cex=.6, col="blue") 
legend("topleft",c("RNA","TE"),col = c("blue","orange"),pch=18,cex=.6, bty="n", horiz=TRUE)

# ######
# #save figure 2S2b as pdf
# pdf("../../figures/Fig2S2b.pdf", width = 4, height = 4)
# plot(c(0,7), c(0,7), col="red", lwd=2, type="l", xlab="Expected (-log10 p-value)", ylab="Observed (-log10 p-value)", xlim=c(0,4), ylim=c(0,20), las=1, xaxs="i", yaxs="i", bty="l", main="rhesus vs human")
# points(log.exp, TE.HvR.sorted.logP, pch=18, cex=.6, col="orange") 
# points(log.exp, RNA.HvR.sorted.logP, pch=18, cex=.6, col="blue") 
# legend("topleft",c("RNA","TE"),col = c("blue","orange"),pch=18,cex=0.75, bty="n", horiz=TRUE)
# dev.off()
# 
# ######




# absolute log2 fold change

# ID protein level divergent genes

species.label <- substring(colnames(protein.expressed.data),1,1)

design <- model.matrix(~species.label)
colnames(design)<-c("Int","Human","Rhesus")


protein.voom.fit <- lmFit(protein.expressed.data,design = design)
HR.contrast <- makeContrasts(Human-Rhesus,levels = design)
protein.voom.HR.fit <- contrasts.fit(protein.voom.fit,HR.contrast)

protein.voom.fit <- eBayes(protein.voom.fit)
protein.voom.HR.fit <- eBayes(protein.voom.HR.fit)



HvC.protein.effect<-protein.voom.fit$coefficient[,2]
RvC.protein.effect<-protein.voom.fit$coefficient[,3]
HvR.protein.effect <- protein.voom.HR.fit$coefficient[,1]

HvC.protein.pval<-protein.voom.fit$p.value[,2]
RvC.protein.pval<-protein.voom.fit$p.value[,3]
HvR.protein.pval<-protein.voom.HR.fit$p.value[,1]



hist(HvC.protein.pval,breaks = 100)

hist(RvC.protein.pval,breaks = 100)

hist(HvR.protein.pval,breaks = 100)

HvC.protein.qval<-qvalue(HvC.protein.pval,fdr.level = 0.01)
RvC.protein.qval<-qvalue(RvC.protein.pval,fdr.level = 0.01)
HvR.protein.qval<-qvalue(HvR.protein.pval,fdr.level = 0.01)


wilcox.test(abs(HvC.interaction.TE.effect[HvC.protein.qval$significant]), abs(HvC.ribo.effect[HvC.protein.qval$significant]))
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  abs(HvC.interaction.TE.effect[HvC.protein.qval$significant]) and abs(HvC.ribo.effect[HvC.protein.qval$significant])
## W = 12430, p-value = 1.045e-14
## alternative hypothesis: true location shift is not equal to 0
boxplot(abs(HvC.interaction.TE.effect[HvC.protein.qval$significant]),  abs(HvC.ribo.effect[HvC.protein.qval$significant]), outline = FALSE, notch = TRUE, names = c("TE","ribo"),col = c("dark orange","royal blue"), ylab="absolute log2 fold difference", main="Effect size in human-chimpanzee divergence")

wilcox.test(abs(HvC.interaction.TE.effect[HvC.protein.qval$significant]), abs(HvC.RNA.effect[HvC.protein.qval$significant]))
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  abs(HvC.interaction.TE.effect[HvC.protein.qval$significant]) and abs(HvC.RNA.effect[HvC.protein.qval$significant])
## W = 15033, p-value = 1.691e-08
## alternative hypothesis: true location shift is not equal to 0
boxplot(abs(HvC.interaction.TE.effect[HvC.protein.qval$significant]),  abs(HvC.RNA.effect[HvC.protein.qval$significant]), outline = FALSE, notch = TRUE, names = c("TE","RNA"),col = c("dark orange","royal blue"), ylab="absolute log2 fold difference", main="Effect size in human-chimpanzee divergence")

wilcox.test(abs(RvC.interaction.TE.effect[RvC.protein.qval$significant]), abs(RvC.RNA.effect[RvC.protein.qval$significant]))
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  abs(RvC.interaction.TE.effect[RvC.protein.qval$significant]) and abs(RvC.RNA.effect[RvC.protein.qval$significant])
## W = 440470, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
boxplot(abs(RvC.interaction.TE.effect[RvC.protein.qval$significant]),  abs(RvC.RNA.effect[RvC.protein.qval$significant]), outline = FALSE, notch = TRUE, names = c("TE","RNA"),col = c("dark orange","royal blue"), ylab="absolute log2 fold difference", main="Effect size in rhesus-chimpanzee divergence")

wilcox.test(abs(HvR.interaction.TE.effect[HvR.protein.qval$significant]), abs(HvR.RNA.effect[HvR.protein.qval$significant]))
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  abs(HvR.interaction.TE.effect[HvR.protein.qval$significant]) and abs(HvR.RNA.effect[HvR.protein.qval$significant])
## W = 583410, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
boxplot(abs(HvR.interaction.TE.effect[HvR.protein.qval$significant]),  abs(HvR.RNA.effect[HvR.protein.qval$significant]), outline = FALSE, notch = TRUE, names = c("TE","RNA"),col = c("dark orange","royal blue"), ylab="absolute log2 fold difference", main="Effect size in human-rhesus divergence")

# ######
# #save figure 2d as pdf
# pdf("../../figures/Fig2d.pdf", width = 4, height = 4)
# boxplot(abs(HvC.interaction.TE.effect[HvC.protein.qval$significant]),  abs(HvC.RNA.effect[HvC.protein.qval$significant]), outline = FALSE, notch = TRUE, names = c("TE","RNA"),col = c("dark orange","royal blue"), ylab="absolute log2 ratio", main="Effect size: human vs. chimpanzee")
# dev.off()
# 
# ######


# ######
# #save figure 2S3a as pdf
# pdf("../../figures/Fig2S3a.pdf", width = 4, height = 4)
# boxplot(abs(RvC.interaction.TE.effect[RvC.protein.qval$significant]),  abs(RvC.RNA.effect[RvC.protein.qval$significant]), outline = FALSE, notch = TRUE, names = c("TE","RNA"),col = c("dark orange","royal blue"), ylab="absolute log2 fold difference", main="Effect size: rhesus vs. chimpanzee")
# dev.off()
# 
# ######

# ######
# #save figure 2S3b as pdf
# pdf("../../figures/Fig2S3b.pdf", width = 4, height = 4)
# boxplot(abs(HvR.interaction.TE.effect[HvR.protein.qval$significant]),  abs(HvR.RNA.effect[HvR.protein.qval$significant]), outline = FALSE, notch = TRUE, names = c("TE","RNA"),col = c("dark orange","royal blue"), ylab="absolute log2 fold difference", main="Effect size: human vs. rhesus")
# dev.off()
# 
# ######

RNA-protein divergence

protein.expressed.weights <- as.data.frame(replicate(15,apply(RNA.expressed.weights,1,mean)))
colnames(protein.expressed.weights)<-colnames(protein.expressed.data)
rownames(protein.expressed.weights)<-rownames(protein.expressed.data)

RNA.protein.combined <- cbind(RNA.expressed.data, protein.expressed.data)
RNA.protein.weights <- cbind(RNA.expressed.weights, protein.expressed.weights)

species.label <- substring(colnames(RNA.protein.combined),1,1)
species.label <- toupper(species.label)
phenotype.label <- c(rep("rna",15),rep("protein",15))

design <- model.matrix(~species.label*phenotype.label)
colnames(design)<-c("Int","Human","Rhesus","RNA","Human.RNA","Rhesus.RNA")


proteinRNA.voom.fit <- lmFit(RNA.protein.combined,design = design,weights = RNA.protein.weights)
HR.contrast <- makeContrasts(Human.RNA-Rhesus.RNA,levels = design)
proteinRNA.voom.HR.fit <- contrasts.fit(proteinRNA.voom.fit,HR.contrast)

proteinRNA.voom.fit <- eBayes(proteinRNA.voom.fit)
proteinRNA.voom.HR.fit <- eBayes(proteinRNA.voom.HR.fit)


HvC.interaction.proteinRNA.effect<- -proteinRNA.voom.fit$coefficient[,5]
RvC.interaction.proteinRNA.effect<- -proteinRNA.voom.fit$coefficient[,6]
HvR.interaction.proteinRNA.effect <- -proteinRNA.voom.HR.fit$coefficient[,1]

HvC.interaction.proteinRNA.pval<-proteinRNA.voom.fit$p.value[,5]
RvC.interaction.proteinRNA.pval<-proteinRNA.voom.fit$p.value[,6]
HvR.interaction.proteinRNA.pval<-proteinRNA.voom.HR.fit$p.value[,1]



hist(HvC.interaction.proteinRNA.pval,breaks = 100)

hist(RvC.interaction.proteinRNA.pval,breaks = 100)

hist(HvR.interaction.proteinRNA.pval,breaks = 100)

proportion propagated downstream

cor(HvC.interaction.proteinRNA.effect,HvC.interaction.TE.effect)^2
## [1] 0.1341324
cor(RvC.interaction.proteinRNA.effect,RvC.interaction.TE.effect)^2
## [1] 0.009994688
cor(HvR.interaction.proteinRNA.effect,HvR.interaction.TE.effect)^2
## [1] 0.0650518
r.squared<-format(cor(HvC.interaction.TE.effect,HvC.interaction.proteinRNA.effect)^2, digits = 2)

# color by FWER value < 0.05
point.color <- cut(HvC.FWER,breaks = c (0,0.05,1), labels = c("blue","grey50"))

ggplot(mapping = aes(x=HvC.interaction.TE.effect, y=HvC.interaction.proteinRNA.effect))+geom_point(size=1, alpha=0.5, col=point.color)+xlab("log2(human/chimpanzee) ribo/RNA")+ ylab("log2(human/chimpanzee) protein/RNA")+ggtitle("human vs. chimpanzee")+scale_x_continuous(limits = c(-8, 8))+scale_y_continuous(limits = c(-8, 8))+theme_bw()+geom_abline(slope = 1, intercept = 0)+annotate(geom="text",x=5,y=-5,label=paste("r^2 =",r.squared))

# protein cutoffs

# HvC
cutoffs<- seq(0,1,0.01)
r.squared <- c()
for (i in 1:length(cutoffs)){
  r.squared[i] <- cor(HvC.ribo.effect[qvalue(HvC.protein.pval)$qvalues<cutoffs[i]],HvC.protein.effect[qvalue(HvC.protein.pval)$qvalues<cutoffs[i]])^2}

plot(cutoffs,r.squared, ylim = c(0,1), pch=18, xlab = "FDR cutoff",main = "HvC: Proportion of divergence in translation propagate to protein levels")

r.squared <- c()
for (i in 1:length(cutoffs)){
  r.squared[i] <- cor(HvC.interaction.TE.effect[qvalue(HvC.protein.pval)$qvalues<cutoffs[i]],HvC.interaction.proteinRNA.effect[qvalue(HvC.protein.pval)$qvalues<cutoffs[i]])^2}

points(cutoffs,r.squared, col ="red",pch=18)

legend("topright", c("transcription not accounted", "transcription accounted"), pch=18,col = c("black","red"), bty = "n")

# ######
# ##save figure 2e as pdf
# pdf("../../figures/Fig2e.pdf", width = 6, height = 6)
# 
# # HvC
# cutoffs<- seq(0,1,0.05)
# r.squared <- c()
# for (i in 1:length(cutoffs)){
#   r.squared[i] <- cor(HvC.ribo.effect[qvalue(HvC.protein.pval)$qvalues<cutoffs[i]],HvC.protein.effect[qvalue(HvC.protein.pval)$qvalues<cutoffs[i]])^2}
# 
# plot(cutoffs,r.squared, ylim = c(0,1), pch=18, xlab = "FDR cutoff", ylab = "Proportion of variance explained (r^2)",main = "HvC: Proportion of TE divergence propagate to proteins")
# 
# r.squared <- c()
# for (i in 1:length(cutoffs)){
#   r.squared[i] <- cor(HvC.interaction.TE.effect[qvalue(HvC.protein.pval)$qvalues<cutoffs[i]],HvC.interaction.proteinRNA.effect[qvalue(HvC.protein.pval)$qvalues<cutoffs[i]])^2}
# 
# points(cutoffs,r.squared, col ="red",pch=18)
# 
# legend("topright", c("transcription not accounted", "transcription accounted"), pch=18,col = c("black","red"), bty = "n")
# 
# 
# dev.off()
# ######


# RvC
cutoffs<- seq(0,1,0.05)
r.squared <- c()
for (i in 1:length(cutoffs)){
  r.squared[i] <- cor(RvC.ribo.effect[qvalue(RvC.protein.pval)$qvalues<cutoffs[i]],RvC.protein.effect[qvalue(RvC.protein.pval)$qvalues<cutoffs[i]])^2}

plot(cutoffs,r.squared, ylim = c(0,1), pch=18, xlab = "FDR cutoff",main = "RvC: Proportion of divergence in translation propagate to protein levels")

r.squared <- c()
for (i in 1:length(cutoffs)){
  r.squared[i] <- cor(RvC.interaction.TE.effect[qvalue(RvC.protein.pval)$qvalues<cutoffs[i]],RvC.interaction.proteinRNA.effect[qvalue(RvC.protein.pval)$qvalues<cutoffs[i]])^2}

points(cutoffs,r.squared, col ="red",pch=18)

legend("topright", c("transcription not accounted", "transcription accounted"), pch=18,col = c("black","red"), bty = "n")

# ######
# ##save figure 2S4a as pdf
# pdf("../../figures/Fig2S4a.pdf", width = 6, height = 6)
# 
# # RvC
# cutoffs<- seq(0,1,0.05)
# r.squared <- c()
# for (i in 1:length(cutoffs)){
#   r.squared[i] <- cor(RvC.ribo.effect[qvalue(RvC.protein.pval)$qvalues<cutoffs[i]],RvC.protein.effect[qvalue(RvC.protein.pval)$qvalues<cutoffs[i]])^2}
# 
# plot(cutoffs,r.squared, ylim = c(0,1), pch=18, xlab = "FDR cutoff", ylab = "Proportion of variance explained (r^2)",main = "RvC: Proportion of TE divergence propagate to proteins")
# 
# r.squared <- c()
# for (i in 1:length(cutoffs)){
#   r.squared[i] <- cor(RvC.interaction.TE.effect[qvalue(RvC.protein.pval)$qvalues<cutoffs[i]],RvC.interaction.proteinRNA.effect[qvalue(RvC.protein.pval)$qvalues<cutoffs[i]])^2}
# 
# points(cutoffs,r.squared, col ="red",pch=18)
# 
# legend("topright", c("transcription not accounted", "transcription accounted"), pch=18,col = c("black","red"), bty = "n")
# 
# 
# dev.off()
# ######

# HvR
cutoffs<- seq(0,1,0.05)
r.squared <- c()
for (i in 1:length(cutoffs)){
  r.squared[i] <- cor(HvR.ribo.effect[qvalue(HvR.protein.pval)$qvalues<cutoffs[i]],HvR.protein.effect[qvalue(HvR.protein.pval)$qvalues<cutoffs[i]])^2}

plot(cutoffs,r.squared, ylim = c(0,1), pch=18, xlab = "FDR cutoff",main = "HvR: Proportion of divergence in translation propagate to protein levels")

r.squared <- c()
for (i in 1:length(cutoffs)){
  r.squared[i] <- cor(HvR.interaction.TE.effect[qvalue(HvR.protein.pval)$qvalues<cutoffs[i]],HvR.interaction.proteinRNA.effect[qvalue(HvR.protein.pval)$qvalues<cutoffs[i]])^2}

points(cutoffs,r.squared, col ="red",pch=18)

legend("topright", c("transcription not accounted", "transcription accounted"), pch=18,col = c("black","red"), bty = "n")

# ######
# ##save figure 2S4b as pdf
# pdf("../../figures/Fig2S4b.pdf", width = 6, height = 6)
# 
# # HvR
# cutoffs<- seq(0,1,0.05)
# r.squared <- c()
# for (i in 1:length(cutoffs)){
#   r.squared[i] <- cor(HvR.ribo.effect[qvalue(HvR.protein.pval)$qvalues<cutoffs[i]],HvR.protein.effect[qvalue(HvR.protein.pval)$qvalues<cutoffs[i]])^2}
# 
# plot(cutoffs,r.squared, ylim = c(0,1), pch=18, xlab = "FDR cutoff", ylab = "Proportion of variance explained (r^2)",main = "HvR: Proportion of TE divergence propagate to proteins")
# 
# r.squared <- c()
# for (i in 1:length(cutoffs)){
#   r.squared[i] <- cor(HvR.interaction.TE.effect[qvalue(HvR.protein.pval)$qvalues<cutoffs[i]],HvR.interaction.proteinRNA.effect[qvalue(HvR.protein.pval)$qvalues<cutoffs[i]])^2}
# 
# points(cutoffs,r.squared, col ="red",pch=18)
# 
# legend("topright", c("transcription not accounted", "transcription accounted"), pch=18,col = c("black","red"), bty = "n")
# 
# 
# dev.off()
# ######